Code
library(tidyverse)
library(lubridate)library(tidyverse)
library(lubridate) download.file(url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv",
destfile = "data/time_series_covid19_confirmed_global.csv")time_series_confirmed <- read_csv("/home/ashossain_umass_edu/Lab_7/time_series_covid19_confirmed_global.csv")|>
rename(Province_State = "Province/State", Country_Region = "Country/Region")
time_series_confirmed_long <- time_series_confirmed |>
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Confirmed")
time_series_confirmed_long$Date <- mdy(time_series_confirmed_long$Date)
time_series_confirmed_long|>
group_by(Country_Region, Date) |>
summarise(Confirmed = sum(Confirmed)) |>
filter (Country_Region == "US") |>
ggplot(aes(x = Date, y = Confirmed)) +
geom_point() +
geom_line() +
ggtitle("US COVID-19 Confirmed Cases")time_series_confirmed_long |>
group_by(Country_Region, Date) |>
summarise(Confirmed = sum(Confirmed)) |>
filter (Country_Region %in% c("China","France","Italy",
"Korea, South", "US")) |>
ggplot(aes(x = Date, y = Confirmed, color = Country_Region)) +
geom_point() +
geom_line() +
ggtitle("COVID-19 Confirmed Cases")time_series_confirmed_long_daily <-time_series_confirmed_long |>
group_by(Country_Region, Date) |>
summarise(Confirmed = sum(Confirmed)) |>
mutate(Daily = Confirmed - lag(Confirmed, default = first(Confirmed )))time_series_confirmed_long_daily |>
filter (Country_Region == "US") |>
ggplot(aes(x = Date, y = Daily, color = Country_Region)) +
geom_point() +
ggtitle("COVID-19 Confirmed Cases")time_series_confirmed_long_daily |>
filter (Country_Region == "US") |>
ggplot(aes(x = Date, y = Daily, color = Country_Region)) +
geom_line() +
ggtitle("COVID-19 Confirmed Cases")time_series_confirmed_long_daily |>
filter (Country_Region == "US") |>
ggplot(aes(x = Date, y = Daily, color = Country_Region)) +
geom_smooth() +
ggtitle("COVID-19 Confirmed Cases")time_series_confirmed_long_daily |>
filter (Country_Region == "US") |>
ggplot(aes(x = Date, y = Daily, color = Country_Region)) +
geom_smooth(method = "gam", se = FALSE) +
ggtitle("COVID-19 Confirmed Cases")library("gganimate")
library("gifski")
theme_set(theme_bw())
time_series_deaths_confirmed <- read_csv("/home/ashossain_umass_edu/Lab_7/time_series_covid19_deaths_global.csv")|>
rename(Province_State = "Province/State", Country_Region = "Country/Region")
time_series_deaths_long <- time_series_deaths_confirmed |>
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Confirmed")
time_series_deaths_long$Date <- mdy(time_series_deaths_long$Date)
daily_counts <- time_series_confirmed_long_daily |>
filter (Country_Region == "US")
p <- ggplot(daily_counts, aes(x = Date, y = Daily, color = Country_Region)) +
geom_point() +
ggtitle("Confirmed COVID-19 Cases") +
# gganimate lines
geom_point(aes(group = seq_along(Date))) +
transition_reveal(Date)
# make the animation
anim <- animate(p, renderer = gifski_renderer(), end_pause = 15)
anim_save("daily_counts_US.gif", animation = anim)library(tidyverse)
table1# A tibble: 6 × 4
country year cases population
<chr> <dbl> <dbl> <dbl>
1 Afghanistan 1999 745 19987071
2 Afghanistan 2000 2666 20595360
3 Brazil 1999 37737 172006362
4 Brazil 2000 80488 174504898
5 China 1999 212258 1272915272
6 China 2000 213766 1280428583
table2# A tibble: 12 × 4
country year type count
<chr> <dbl> <chr> <dbl>
1 Afghanistan 1999 cases 745
2 Afghanistan 1999 population 19987071
3 Afghanistan 2000 cases 2666
4 Afghanistan 2000 population 20595360
5 Brazil 1999 cases 37737
6 Brazil 1999 population 172006362
7 Brazil 2000 cases 80488
8 Brazil 2000 population 174504898
9 China 1999 cases 212258
10 China 1999 population 1272915272
11 China 2000 cases 213766
12 China 2000 population 1280428583
table3# A tibble: 6 × 3
country year rate
<chr> <dbl> <chr>
1 Afghanistan 1999 745/19987071
2 Afghanistan 2000 2666/20595360
3 Brazil 1999 37737/172006362
4 Brazil 2000 80488/174504898
5 China 1999 212258/1272915272
6 China 2000 213766/1280428583
table1 |>
mutate(rate = cases / population * 10000)# A tibble: 6 × 5
country year cases population rate
<chr> <dbl> <dbl> <dbl> <dbl>
1 Afghanistan 1999 745 19987071 0.373
2 Afghanistan 2000 2666 20595360 1.29
3 Brazil 1999 37737 172006362 2.19
4 Brazil 2000 80488 174504898 4.61
5 China 1999 212258 1272915272 1.67
6 China 2000 213766 1280428583 1.67
table1 |>
group_by(year) |>
summarize(total_cases = sum(cases))# A tibble: 2 × 2
year total_cases
<dbl> <dbl>
1 1999 250740
2 2000 296920
ggplot(table1, aes(x = year, y = cases)) +
geom_line(aes(group = country), color = "grey50") +
geom_point(aes(color = country, shape = country)) +
scale_x_continuous(breaks = c(1999, 2000)) # x-axis breaks at 1999 and 2000billboard |>
pivot_longer(
cols = starts_with("wk"),
names_to = "week",
values_to = "rank"
)# A tibble: 24,092 × 5
artist track date.entered week rank
<chr> <chr> <date> <chr> <dbl>
1 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk1 87
2 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk2 82
3 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk3 72
4 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk4 77
5 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk5 87
6 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk6 94
7 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk7 99
8 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk8 NA
9 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk9 NA
10 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk10 NA
# ℹ 24,082 more rows
billboard |>
pivot_longer(
cols = starts_with("wk"),
names_to = "week",
values_to = "rank",
values_drop_na = TRUE
)# A tibble: 5,307 × 5
artist track date.entered week rank
<chr> <chr> <date> <chr> <dbl>
1 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk1 87
2 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk2 82
3 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk3 72
4 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk4 77
5 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk5 87
6 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk6 94
7 2 Pac Baby Don't Cry (Keep... 2000-02-26 wk7 99
8 2Ge+her The Hardest Part Of ... 2000-09-02 wk1 91
9 2Ge+her The Hardest Part Of ... 2000-09-02 wk2 87
10 2Ge+her The Hardest Part Of ... 2000-09-02 wk3 92
# ℹ 5,297 more rows
billboard_longer <- billboard |>
pivot_longer(
cols = starts_with("wk"),
names_to = "week",
values_to = "rank",
values_drop_na = TRUE
) |>
mutate(
week = parse_number(week)
)
billboard_longer |>
ggplot(aes(x = week, y = rank, group = track)) +
geom_line(alpha = 0.25) +
scale_y_reverse()df <- tribble(
~id, ~bp1, ~bp2,
"A", 100, 120,
"B", 140, 115,
"C", 120, 125
)
df |>
pivot_longer(
cols = bp1:bp2,
names_to = "measurement",
values_to = "value"
)# A tibble: 6 × 3
id measurement value
<chr> <chr> <dbl>
1 A bp1 100
2 A bp2 120
3 B bp1 140
4 B bp2 115
5 C bp1 120
6 C bp2 125
who2 |>
pivot_longer(
cols = !(country:year),
names_to = c("diagnosis", "gender", "age"),
names_sep = "_",
values_to = "count"
)# A tibble: 405,440 × 6
country year diagnosis gender age count
<chr> <dbl> <chr> <chr> <chr> <dbl>
1 Afghanistan 1980 sp m 014 NA
2 Afghanistan 1980 sp m 1524 NA
3 Afghanistan 1980 sp m 2534 NA
4 Afghanistan 1980 sp m 3544 NA
5 Afghanistan 1980 sp m 4554 NA
6 Afghanistan 1980 sp m 5564 NA
7 Afghanistan 1980 sp m 65 NA
8 Afghanistan 1980 sp f 014 NA
9 Afghanistan 1980 sp f 1524 NA
10 Afghanistan 1980 sp f 2534 NA
# ℹ 405,430 more rows
household |>
pivot_longer(
cols = !family,
names_to = c(".value", "child"),
names_sep = "_",
values_drop_na = TRUE
)# A tibble: 9 × 4
family child dob name
<int> <chr> <date> <chr>
1 1 child1 1998-11-26 Susan
2 1 child2 2000-01-29 Jose
3 2 child1 1996-06-22 Mark
4 3 child1 2002-07-11 Sam
5 3 child2 2004-04-05 Seth
6 4 child1 2004-10-10 Craig
7 4 child2 2009-08-27 Khai
8 5 child1 2000-12-05 Parker
9 5 child2 2005-02-28 Gracie
cms_patient_experience |>
distinct(measure_cd, measure_title)# A tibble: 6 × 2
measure_cd measure_title
<chr> <chr>
1 CAHPS_GRP_1 CAHPS for MIPS SSM: Getting Timely Care, Appointments, and Infor…
2 CAHPS_GRP_2 CAHPS for MIPS SSM: How Well Providers Communicate
3 CAHPS_GRP_3 CAHPS for MIPS SSM: Patient's Rating of Provider
4 CAHPS_GRP_5 CAHPS for MIPS SSM: Health Promotion and Education
5 CAHPS_GRP_8 CAHPS for MIPS SSM: Courteous and Helpful Office Staff
6 CAHPS_GRP_12 CAHPS for MIPS SSM: Stewardship of Patient Resources
cms_patient_experience |>
pivot_wider(
names_from = measure_cd,
values_from = prf_rate
)# A tibble: 500 × 9
org_pac_id org_nm measure_title CAHPS_GRP_1 CAHPS_GRP_2 CAHPS_GRP_3
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 0446157747 USC CARE MEDICA… CAHPS for MI… 63 NA NA
2 0446157747 USC CARE MEDICA… CAHPS for MI… NA 87 NA
3 0446157747 USC CARE MEDICA… CAHPS for MI… NA NA 86
4 0446157747 USC CARE MEDICA… CAHPS for MI… NA NA NA
5 0446157747 USC CARE MEDICA… CAHPS for MI… NA NA NA
6 0446157747 USC CARE MEDICA… CAHPS for MI… NA NA NA
7 0446162697 ASSOCIATION OF … CAHPS for MI… 59 NA NA
8 0446162697 ASSOCIATION OF … CAHPS for MI… NA 85 NA
9 0446162697 ASSOCIATION OF … CAHPS for MI… NA NA 83
10 0446162697 ASSOCIATION OF … CAHPS for MI… NA NA NA
# ℹ 490 more rows
# ℹ 3 more variables: CAHPS_GRP_5 <dbl>, CAHPS_GRP_8 <dbl>, CAHPS_GRP_12 <dbl>
df <- tribble(
~id, ~measurement, ~value,
"A", "bp1", 100,
"A", "bp1", 102,
"A", "bp2", 120,
"B", "bp1", 140,
"B", "bp2", 115
)
df |>
pivot_wider(
names_from = measurement,
values_from = value
)# A tibble: 2 × 3
id bp1 bp2
<chr> <list> <list>
1 A <dbl [2]> <dbl [1]>
2 B <dbl [1]> <dbl [1]>
df |>
select(-measurement, -value) |>
distinct() |>
mutate(x = NA, y = NA, z = NA)# A tibble: 2 × 4
id x y z
<chr> <lgl> <lgl> <lgl>
1 A NA NA NA
2 B NA NA NA
df |>
pivot_wider(
names_from = measurement,
values_from = value
)# A tibble: 2 × 3
id bp1 bp2
<chr> <list> <list>
1 A <dbl [2]> <dbl [1]>
2 B <dbl [1]> <dbl [1]>
Instead of making a graph of 5 countries on the same graph as in the above example, use facet_wrap with scales=“free_y”.
p <- time_series_deaths_long |>
filter(Country_Region %in% c("US", "Canada", "Mexico", "Brazil", "Egypt",
"Ecuador", "India", "Netherlands", "Germany", "China")) |>
ggplot(aes(x = Date, y = Confirmed, color = Country_Region)) +
geom_line() +
facet_wrap(~ Country_Region, scales = "free_y") +
transition_reveal(Date) +
labs(title = "Cumulative Deaths Over Time: {frame_along}", y = "Deaths") +
theme_minimal()
anim <- animate(p, renderer = gifski_renderer(), end_pause = 15)
anim_save("deaths_by_country_facet.gif", animation = anim)Using the daily count of confirmed cases, make a single graph with 5 countries of your choosing.
daily_counts <- time_series_confirmed_long_daily |>
filter(Country_Region %in% c("US", "India", "Brazil", "Germany", "Mexico"))
# Step 2: Create the animated plot
p <- ggplot(daily_counts, aes(x = Date, y = Daily, color = Country_Region)) +
geom_line() + # Use line to better show daily trends
labs(
title = "Daily Confirmed COVID-19 Cases: {frame_along}",
y = "Daily Cases",
x = "Date",
color = "Country"
) +
transition_reveal(Date) +
theme_minimal()
# Step 3: Animate and save
anim <- animate(p, renderer = gifski_renderer(), end_pause = 15)
anim_save("daily_confirmed_5countries.gif", animation = anim)Plot the cumulative deaths in the US, Canada and Mexico (you will need to download time_series_covid19_deaths_global.csv)
deaths_raw <- read.csv("/home/ashossain_umass_edu/Lab_7/time_series_covid19_deaths_global.csv") |>
rename(Province_State = "Province.State", Country_Region = "Country.Region")
time_series_deaths_long <- time_series_deaths_confirmed |>
pivot_longer(
cols = -c(Province_State, Country_Region, Lat, Long),
names_to = "Date",
values_to = "Deaths"
)
# Convert Date to Date format
time_series_deaths_long$Date <- mdy(time_series_deaths_long$Date)
deaths_summary <- time_series_deaths_long |>
filter(Country_Region %in% c("US", "Canada", "Mexico")) |>
group_by(Country_Region, Date) |>
summarise(Cumulative_Deaths = sum(Deaths), .groups = "drop")
p <- ggplot(deaths_summary, aes(x = Date, y = Cumulative_Deaths, color = Country_Region)) +
geom_line(size = 1) +
labs(
title = "Cumulative COVID-19 Deaths: US, Canada, Mexico",
x = "Date",
y = "Cumulative Deaths",
color = "Country"
)
pMake a graph with the countries of your choice using the daily deaths data
deaths_raw <- read.csv("/home/ashossain_umass_edu/Lab_7/time_series_covid19_deaths_global.csv") |>
rename(Province_State = "Province.State", Country_Region = "Country.Region")
time_series_deaths_long <- time_series_deaths_confirmed |>
pivot_longer(
cols = -c(Province_State, Country_Region, Lat, Long),
names_to = "Date",
values_to = "Deaths"
)
# Convert Date to Date format
time_series_deaths_long$Date <- mdy(time_series_deaths_long$Date)
deaths_summary <- time_series_deaths_long |>
filter(Country_Region %in% c("China", "Latvia", "Albania")) |>
group_by(Country_Region, Date) |>
summarise(Cumulative_Deaths = sum(Deaths), .groups = "drop")
ggplot(deaths_summary, aes(x = Date, y = Cumulative_Deaths, color = Country_Region)) +
geom_line(size = 1) +
labs(
title = "Cumulative COVID-19 Deaths: China, Latvia, Albania",
x = "Date",
y = "Cumulative Deaths",
color = "Country"
)Make an animation of your choosing (do not use a graph with geom_smooth)
deaths_data <- read_csv("/home/ashossain_umass_edu/Lab_7/time_series_covid19_deaths_global.csv") |>
rename(Province_State = "Province/State", Country_Region = "Country/Region")
# Convert to long format
deaths_long <- deaths_data |>
pivot_longer(
cols = -c(Province_State, Country_Region, Lat, Long),
names_to = "Date",
values_to = "Cumulative_Deaths"
) |>
mutate(Date = mdy(Date))
daily_deaths <- deaths_long |>
group_by(Country_Region, Date) |>
summarise(Cumulative_Deaths = sum(Cumulative_Deaths), .groups = "drop") |>
group_by(Country_Region) |>
arrange(Date) |>
mutate(Daily_Deaths = Cumulative_Deaths - lag(Cumulative_Deaths)) |>
ungroup()
selected_countries <- c("US", "India", "Brazil", "Germany", "Canada")
daily_deaths_filtered <- daily_deaths |>
filter(Country_Region %in% selected_countries, !is.na(Daily_Deaths))
p <- ggplot(daily_deaths_filtered, aes(x = Date, y = Daily_Deaths, color = Country_Region)) +
geom_line(size = 1) +
labs(
title = "Daily COVID-19 Deaths Over Time",
subtitle = "Date: {frame_along}",
x = "Date",
y = "Daily Deaths",
color = "Country"
) +
transition_reveal(Date) +
theme_minimal()
anim <- animate(p, renderer = gifski_renderer(), end_pause = 15)
anim_save("daily_deaths_animation.gif", animation = anim)